knitr::opts_chunk$set(
warning = TRUE, # show warnings during codebook generation
message = TRUE, # show messages during codebook generation
error = TRUE, # do not interrupt codebook generation in case of errors,
# usually better for debugging
echo = TRUE # show R code
)
ggplot2::theme_set(ggplot2::theme_bw())
pander::panderOptions("table.split.table", Inf)
# load libraries
library(codebook)
library(here)
library(dplyr)
library(tidyverse)
library(future)
library(labelled)
This is a data dictionary for the data set used in the paper “Cognates are advantaged in early bilingual expressive vocabulary development”.
#load dataset
keepers_cognate_full <- rio::import(here::here("data_keepers/public_keepers_cognate_full.csv"))
keepers_cognate_matched <- rio::import(here::here("data_keepers/public_keepers_cognate_matched.csv"))
codebook_items(keepers_cognate_full)
codebook_items(keepers_cognate_matched)
The dataset included N = 47 participants.
keepers_cognate_full %>%
mutate(administration = str_c(subject_id, "_", n_months)) %>%
summarize(N_subject_id = n_distinct(subject_id),
N_administration = n_distinct(administration))
## N_subject_id N_administration
## 1 47 219
In the final analysis, we only kept data where both the English and French forms were completed within the same month.
keepers_cognate_full %>%
mutate(administration = str_c(subject_id, "_", n_months)) %>%
distinct(administration, .keep_all = TRUE) %>%
summarize(n_completed_eng = length(completed_eng),
n_completed_fr = length(completed_fr),
n_completed_both = length(completed_both))
## n_completed_eng n_completed_fr n_completed_both
## 1 219 219 219
keepers_cognate_full %>%
summarize(mean_age_days = mean(age_days, na.rm = T),
sd_age_days = sd(age_days, na.rm = T),
min_age_days = min(age_days, na.rm = T),
max_age_days = max(age_days, na.rm = T))
## mean_age_days sd_age_days min_age_days max_age_days
## 1 632.4475 77.87396 493 826
keepers_cognate_full %>%
ggplot(aes(age_days)) +
geom_histogram()
keepers_cognate_full %>%
distinct(subject_id, .keep_all = TRUE) %>%
count(sex) %>%
mutate(percentage = round(n/sum(n)*100, 2))
## sex n percentage
## 1 Female 24 51.06
## 2 Male 21 44.68
## 3 Other 2 4.26
keepers_cognate_full %>%
mutate(administration = str_c(subject_id, "_", n_months)) %>%
distinct(administration, .keep_all = TRUE) %>%
pivot_longer(c(matches("total_")), names_to = "type", values_to = "n_words") %>%
group_by(type) %>%
summarize(mean_n_words = mean(n_words, na.rm=T),
sd_n_words = sd(n_words, na.rm=T),
min_n_words = min(n_words, na.rm=T),
max_n_words = max(n_words, na.rm=T))
## # A tibble: 3 × 5
## type mean_n_words sd_n_words min_n_words max_n_words
## <chr> <dbl> <dbl> <int> <int>
## 1 total_eng_vocab 94.1 119. 0 601
## 2 total_fr_vocab 87.3 90.3 0 527
## 3 total_vocabulary 181. 186. 0 845
keepers_cognate_full %>%
filter(subject_id == 53279 & n_months == 1) %>%
summarize(n = length(word_pairs))
## n
## 1 537
keepers_cognate_full %>%
filter(subject_id == 53279 & n_months == 1) %>%
group_by(cognate_status) %>%
summarize(n = length(cognate_status))
## # A tibble: 2 × 2
## cognate_status n
## <chr> <int>
## 1 cognate 131
## 2 non-cognate 406
keepers_cognate_full %>%
filter(subject_id == 53279 & n_months == 1) %>%
summarize(eng_n = length(english_item),
fr_n = length(french_item))
## eng_n fr_n
## 1 537 537
keepers_cognate_matched %>%
filter(subject_id == 53279 & n_months == 1) %>%
summarize(n = length(word_pairs))
## n
## 1 162
keepers_cognate_matched %>%
filter(subject_id == 53279 & n_months == 1) %>%
group_by(cognate_status) %>%
summarize(n = length(cognate_status))
## # A tibble: 2 × 2
## cognate_status n
## <chr> <int>
## 1 cognate 81
## 2 non-cognate 81
keepers_cognate_matched %>%
filter(subject_id == 53279 & n_months == 1) %>%
summarize(eng_n = length(english_item),
fr_n = length(french_item))
## eng_n fr_n
## 1 162 162